Process from counts matrix to ready for downstream analysis
In this phase we do final filtering, binarization, TF-IDF, SVD, UMAP for scATAC data based on binarized count over peaks identified over clades in phases 1-3, extract transcript expression signal from scATAC data, plot marker genes, find tentative cluster ids, integrate with scRNA etc…
plan("multisession", workers = 6)
options(future.globals.maxSize = 12 * 1024 ^ 3)
#setwd("/Volumes/ExtSSD/ExtraWorkspace/E12R1/scATAC_data/")
sample.name <- "E12_R1"
run.date <- "240322"
# Reading scRNA data rds object
#s.data_rna <- readRDS("../scRNA/E14_DI_scRNAseq_neurons_clean.rds")
#cluster_names<-read_tsv("../scRNA/E14_DI_scRNAseq_cleaned_top20_allclusters.csv")
#cluster_id_name <- distinct(cluster_names[,c("cluster","ClusterName")])->cluster_id_name
#scRNA_clean_markers_file <- "../scRNA/E14_DI_scRNAseq_cleaned_top20_allclusters.csv"
s.data <- readRDS(paste("../scATAC_data/",sample.name,".merged.peaks.271021.Rds",sep=""))
s.data_RNA <- readRDS("../scRNA_data/e12_ens_whole_rescaled.Rds")
Warning in gzfile(file, "rb") :
cannot open compressed file '../scRNA_data/e12_ens_whole_rescaled.Rds', probable reason 'No such file or directory'
Error in gzfile(file, "rb") : cannot open the connection
DefaultAssay(s.data) <- 'peaks'
set.seed(2020)
s.data <- RunTFIDF(s.data, method=3)
Performing TF-IDF normalization
s.data <- FindTopFeatures(s.data, min.cutoff = 'q25')
s.data <- RunSVD(
object = s.data,
assay = 'peaks',
reduction.key = 'LSI_',
reduction.name = 'lsi'
)
Running SVD
Scaling cell embeddings
DepthCor(s.data)
Plotting read depth correlation plot, in Seurat pipeline usually first PC1 is omitted because of this association.
s.data <- RunUMAP(object = s.data, reduction = 'lsi', dims = 2:30, min.dist=0.05, spread=1.1)
Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
15:06:16 UMAP embedding parameters a = 1.506 b = 0.8373
15:06:16 Read 4952 rows and found 29 numeric columns
15:06:16 Using Annoy for neighbor search, n_neighbors = 30
15:06:16 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:06:17 Writing NN index file to temp file /var/folders/yb/wvk8t07s719136klxnc8nmp9sglf93/T//RtmpDm8Ztu/filef67927c6cd96
15:06:17 Searching Annoy index using 6 threads, search_k = 3000
15:06:17 Annoy recall = 100%
15:06:18 Commencing smooth kNN distance calibration using 6 threads
15:06:20 Initializing from normalized Laplacian + noise
15:06:21 Commencing optimization for 500 epochs, with 187612 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:06:28 Optimization finished
s.data <- FindNeighbors(object = s.data, reduction = 'lsi', dims = 2:30)
Computing nearest neighbor graph
Computing SNN
s.data <- FindClusters(object = s.data, verbose = FALSE, algorithm = 4, resolution = 1)
DimPlot(object = s.data, label = TRUE, pt.size=1.2) + NoLegend()
DimPlot(object = s.data, label = TRUE, pt.size=.5,group.by="replicate", shuffle =TRUE) + NoLegend()
# Compute gene activities
# Now that scRNA data has been processed with ENSMUSG ids this now longer works directly like this.
# Let's extract genomic annotation metadata from s.data[['peaks']]
genomic.metadata <- mcols(Annotation(s.data[['peaks']]))
# Generate conversion table from gene_name to gene_id
gene_name2gene_id <- as_tibble(genomic.metadata[,c("gene_name","gene_id")])
# Calculate gene activity estimate from scATAC reads based on the scATAC, by using gene_names as function does not support any other id
gene.activities <- GeneActivity(s.data, assay="peaks")
Extracting gene coordinates
Extracting reads overlapping genomic regions
Extracting reads overlapping genomic regions
# Store gene_names
gene.names <- rownames(gene.activities)
# Switch sparse matrix to use ensmusg id
ensmusg.ids <- gene_name2gene_id[match(gene.names,pull(gene_name2gene_id,"gene_name")),] %>% pull("gene_id")
gene_names <- gene_name2gene_id[match(gene.names,pull(gene_name2gene_id,"gene_name")),] %>% pull("gene_name")
# Dropping NAs
non.na.i <- !is.na(ensmusg.ids)
gene.activities.gene_id <- gene.activities[non.na.i,]
rownames(gene.activities.gene_id) <- ensmusg.ids[non.na.i]
# Add the gene activity matrix to the Seurat object as a new assay
s.data[['Activity']] <- CreateAssayObject(counts = gene.activities.gene_id)
s.data <- NormalizeData(
object = s.data,
assay = 'Activity',
normalization.method = 'LogNormalize',
scale.factor = median(s.data$nCount_Activity)
)
# Add gene_name to gene_id mapping into the s.data[['Activity']] assays metadata
s.data[['Activity']]<- AddMetaData(s.data[['Activity']], col.name = "feature_symbol", metadata = gene_names[non.na.i])
#DimPlot(object = s.data, label = TRUE, pt.size=1.2,group.by="clade") + NoLegend()
neuronal.markers<- read_tsv("../../CellAnnotation/E12.5_cluster_markers_for_ATACseq.txt", col_names = c("annotation","geneName"))
Rows: 73 Columns: 2
── Column specification ──────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): annotation, geneName
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
feature.metadata <- s.data[['Activity']][[]] %>% rownames_to_column(var="gene_id") %>% as_tibble()
neuronal.markers.tmp <- filter(feature.metadata, feature_symbol %in% neuronal.markers$geneName)
neuronal.markers.ids <- pull(neuronal.markers.tmp,"gene_id")
neuronal.markers.names <- pull(neuronal.markers.tmp,"feature_symbol")
DefaultAssay(s.data) <- 'Activity'
f.plot.tmp <- FeaturePlot(
object = s.data,
features = neuronal.markers.ids,
pt.size = 0.1,
max.cutoff = 'q95',
combine = F
)
f.plots.1 <- lapply(1:length(f.plot.tmp),function(i){
f.plot.tmp[[i]] + labs(title=neuronal.markers.names[i])
})
patchwork::wrap_plots(f.plots.1)
Plot of some marker genes with signal from 2kbp upstream and only from those feature regions (peaks) that form clusters seen in UMAP
morello_e12_clusters <- read_tsv("../scRNA_data/Morello_et_al_cluster_anno.tsv")
Rows: 42 Columns: 2
── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): clusterName
dbl (1): clusterNumber
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
morello.i <- match(as.vector(s.data_RNA@meta.data$seurat_clusters),morello_e12_clusters$clusterNumber)
s.data_RNA <- AddMetaData(s.data_RNA, pull(morello_e12_clusters[morello.i,2], clusterName), col.name = "clusterAnnotation")
# Converting numerical Seurat clusters to alphabeticals and store in $CellType slot of s.data_rna
# Finding transfer anchors
transfer.anchors <- FindTransferAnchors(
reference = s.data_RNA,
query = s.data,
reduction = 'cca',
reference.assay="RNA",
query.assay = "Activity",
features = VariableFeatures(object=s.data_RNA)
)
Warning: 335 features of the features specified were not present in both the reference query assays.
Continuing with remaining 2665 features.
Warning in RunCCA.Seurat(object1 = reference, object2 = query, features = features, :
Running CCA on different assays
Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 11260 anchors
Filtering anchors
Retained 7005 anchors
# TODO: Find out which object and why there are graphs(4?) without associated assays, this however based on googling doesn't show as meaningful warning()
predicted.labels <- TransferData(
anchorset = transfer.anchors,
refdata = s.data_RNA@meta.data$clusterAnnotation,
weight.reduction = s.data[['lsi']],
dims = 2:30
)
Finding integration vectors
Finding integration vector weights
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Predicting cell labels
s.data <- AddMetaData(object = s.data, metadata = predicted.labels)
Label transfer, following and adapting https://satijalab.org/seurat/v3.0/atacseq_integration_vignette.html
Histogram of prediction scores, in Satija lab integration process each cell get prediction score how well it can be mapped from dataspace to another. Values > 0.5 are considered to be “good”.
prediction.score.over.th <- table(s.data$prediction.score.max > 0.5)
p.freq <- prediction.score.over.th['TRUE']/(prediction.score.over.th['TRUE']+prediction.score.over.th['FALSE'])
p.score.th <- as.numeric(prediction.score.over.th)
val_names <- sprintf("%s (%s)", c("Match not found", "Match found"), scales::percent(round(p.score.th/sum(p.score.th), 2)))
names(p.score.th) <- val_names
waffle::waffle(p.score.th/10,colors = c("#fb8072", "#8dd3c7", "white"), rows=9, size=1)
Waffle plot visualizing proportions of properly mapping cells
#' Select only cells with prediction score over 0.5
s.data.filtered <- subset(s.data, subset = prediction.score.max > 0.5)
#' To make the colors match, TODO: Check why there are few NAs in the predicted ids
#s.data.filtered$predicted.id <- factor(s.data.filtered$predicted.id, levels = letters[as.numeric(levels(s.data_rna))])
# Do combined plotting
p1 <- DimPlot(s.data.filtered, group.by = "predicted.id", label = TRUE, repel = TRUE, label.size=7, pt.size=2) + NoLegend() + scale_colour_hue(drop = FALSE)
p2 <- DimPlot(s.data_RNA, group.by = "clusterAnnotation", label = TRUE, repel = TRUE, label.size=7) + NoLegend()
p1 + ggtitle("scATAC-seq cells, labels predicted from scRNA")
p2 + ggtitle("scRNA-seq cells")
Filtering for cells mapping properly and visualizing cluster labels from scRNA (right side) to scATAC (left side).
Finding integration vectors
Finding integration vector weights
Transfering 27999 features onto reference data
Centering data matrix
11:43:23 UMAP embedding parameters a = 0.9922 b = 1.112
11:43:23 Read 10354 rows and found 29 numeric columns
11:43:23 Using Annoy for neighbor search, n_neighbors = 30
11:43:23 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|
11:43:28 Writing NN index file to temp file /var/folders/yb/wvk8t07s719136klxnc8nmp9sglf93/T//Rtmpnq8Zdj/file72653e114263
11:43:28 Searching Annoy index using 1 thread, search_k = 3000
11:43:32 Annoy recall = 100%
11:43:32 Commencing smooth kNN distance calibration using 1 thread
11:43:35 Initializing from normalized Laplacian + noise
11:43:35 Commencing optimization for 200 epochs, with 432826 positive edges
11:43:45 Optimization finished
p1 <- DimPlot(coembed, group.by = "tech")
p2 <- DimPlot(coembed, group.by = "clusterAnnotation", label = TRUE, repel = TRUE) + theme(legend.position="none")
p1 + p2
Coembed plotting scATAC and scRNA cells together. Usable mainly for validation purposes.
s.data <- subset(s.data, subset = prediction.score.max > 0.5)
DefaultAssay(s.data) <- "peaks"
s.data <- RunUMAP(object = s.data, reduction = 'lsi', dims = 2:30, spread=1.4)
15:30:06 UMAP embedding parameters a = 0.6151 b = 1.019
15:30:06 Read 4329 rows and found 29 numeric columns
15:30:06 Using Annoy for neighbor search, n_neighbors = 30
15:30:06 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:30:07 Writing NN index file to temp file /var/folders/yb/wvk8t07s719136klxnc8nmp9sglf93/T//RtmpDm8Ztu/filef679522af9b5
15:30:07 Searching Annoy index using 6 threads, search_k = 3000
15:30:07 Annoy recall = 100%
15:30:11 Commencing smooth kNN distance calibration using 6 threads
15:30:14 Initializing from normalized Laplacian + noise
15:30:14 Commencing optimization for 500 epochs, with 161262 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
15:30:20 Optimization finished
s.data <- FindNeighbors(object = s.data, reduction = 'lsi', dims = 2:30)
Computing nearest neighbor graph
Computing SNN
s.data <- FindClusters(object = s.data, verbose = FALSE, algorithm = 4, resolution = 1)
DimPlot(object = s.data, label = TRUE, pt.size=1.2) + NoLegend()
neuronal.markers<- read_tsv("../../CellAnnotation/E12.5_cluster_markers_for_ATACseq.txt", col_names = c("annotation","geneName"))
Rows: 73 Columns: 2
── Column specification ──────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): annotation, geneName
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
feature.metadata <- s.data[['Activity']][[]] %>% rownames_to_column(var="gene_id") %>% as_tibble()
neuronal.markers.tmp <- filter(feature.metadata, feature_symbol %in% neuronal.markers$geneName)
neuronal.markers.ids <- pull(neuronal.markers.tmp,"gene_id")
neuronal.markers.names <- pull(neuronal.markers.tmp,"feature_symbol")
DefaultAssay(s.data) <- 'RNA'
f.plot.tmp <- FeaturePlot(
object = s.data,
features = neuronal.markers.ids,
pt.size = 0.1,
max.cutoff = 'q95',
combine = F
)
f.plots.2 <- lapply(1:length(f.plot.tmp),function(i){
f.plot.tmp[[i]] + labs(title=neuronal.markers.names[i])
})
patchwork::wrap_plots(f.plots.2)
Plot marker gene FeaturePlots with imputed scRNA data
# Define a set of HK genes
hk.genes <- c("RRN18S","Actb","Gapdh","Pgk1","Ppia","Rpl13a","Rplp0","Arbp","B2M","Ywhaz","Sdha","Tfrc","Gusb","Hmbs","Hprt1","Tbp")
hk.genes.id <- convert_feature_identity(s.data, "RNA",features = hk.genes)
[1] "Instance: Found matching for 12 features out of total 16 provided features"
neuronal.markers.id <- convert_feature_identity(s.data, "RNA",features = neuronal.markers$geneName)
[1] "Instance: Found matching for 71 features out of total 73 provided features"
# Neuronal marker mean per cluster in Cusanovich data
gene.i <- match(neuronal.markers.id,s.data[['RNA']]@data@Dimnames[[1]])
gene.i<-gene.i[!is.na(gene.i)]
barcode.clusters <- s.data@meta.data$seurat_clusters
marker.matrix <- s.data[['RNA']]@data[gene.i,]
marker.tb <- as_tibble(t(as.data.frame(marker.matrix)))
marker.tb<-tibble(marker.tb,cluster=barcode.clusters)
marker.mean <- list()
marker.mean$mean.by.cluster <- marker.tb %>% group_by(cluster) %>% summarize_all(mean)
# HK gene mean per cluster in Cusanovich data
gene.i <- match(hk.genes.id,s.data[['RNA']]@data@Dimnames[[1]])
gene.i<-gene.i[!is.na(gene.i)]
barcode.clusters <- s.data@meta.data$seurat_clusters
marker.matrix <- s.data[['RNA']]@data[gene.i,]
marker.tb <- as_tibble(t(as.data.frame(marker.matrix)))
marker.tb<-tibble(marker.tb,cluster=barcode.clusters)
hk.mean <- list()
hk.mean$mean.by.cluster <- marker.tb %>% group_by(cluster) %>% summarize_all(mean)
# Gini indeces for Cusanovich data
cus.hk.gini <- apply(hk.mean$mean.by.cluster[,-1],2,gini)
cus.neur.gini <- apply(marker.mean$mean.by.cluster[,-1],2,gini)
gini.tb<-tibble(gini.index=c(cus.hk.gini,cus.neur.gini),type=c(rep("hk",length(cus.hk.gini)),rep("neur",length(cus.neur.gini)))) %>% dplyr::filter(!is.na(gini.index))
ggplot(gini.tb, aes(x=gini.index,y=type,fill="blue"))+geom_boxplot(fill="lightblue")+ theme(legend.position="none") + theme_classic()
Gini index based validation of clustering.
Hocomocov11 <- read_jaspar("../../mm10/HOCOMOCOv11_core_MOUSE_mono_jaspar_format.txt")
names(Hocomocov11) <- lapply(Hocomocov11,function(x){x@name})
Hocomocov11 <- convert_motifs(Hocomocov11, "TFBSTools-PWMatrix")
Note: motif [AHR_M..] has an empty nsites slot, using 100.
Note: motif [AIRE_..] has an empty nsites slot, using 100.
Note: motif [ALX1_..] has an empty nsites slot, using 100.
Note: motif [ANDR_..] has an empty nsites slot, using 100.
Note: motif [AP2A_..] has an empty nsites slot, using 100.
Note: motif [AP2B_..] has an empty nsites slot, using 100.
Note: motif [AP2C_..] has an empty nsites slot, using 100.
Note: motif [ARI5B..] has an empty nsites slot, using 100.
Note: motif [ARNT_..] has an empty nsites slot, using 100.
Note: motif [ASCL1..] has an empty nsites slot, using 100.
Note: motif [ASCL2..] has an empty nsites slot, using 100.
Note: motif [ATF1_..] has an empty nsites slot, using 100.
Note: motif [ATF2_..] has an empty nsites slot, using 100.
Note: motif [ATF3_..] has an empty nsites slot, using 100.
Note: motif [ATF4_..] has an empty nsites slot, using 100.
Note: motif [ATOH1..] has an empty nsites slot, using 100.
Note: motif [BACH1..] has an empty nsites slot, using 100.
Note: motif [BACH2..] has an empty nsites slot, using 100.
Note: motif [BARX1..] has an empty nsites slot, using 100.
Note: motif [BATF3..] has an empty nsites slot, using 100.
Note: motif [BATF_..] has an empty nsites slot, using 100.
Note: motif [BCL6_..] has an empty nsites slot, using 100.
Note: motif [BHA15..] has an empty nsites slot, using 100.
Note: motif [BHE40..] has an empty nsites slot, using 100.
Note: motif [BMAL1..] has an empty nsites slot, using 100.
Note: motif [BRAC_..] has an empty nsites slot, using 100.
Note: motif [CDX1_..] has an empty nsites slot, using 100.
Note: motif [CDX2_..] has an empty nsites slot, using 100.
Note: motif [CEBPA..] has an empty nsites slot, using 100.
Note: motif [CEBPB..] has an empty nsites slot, using 100.
Note: motif [CEBPD..] has an empty nsites slot, using 100.
Note: motif [CEBPE..] has an empty nsites slot, using 100.
Note: motif [CEBPG..] has an empty nsites slot, using 100.
Note: motif [CLOCK..] has an empty nsites slot, using 100.
Note: motif [COE1_..] has an empty nsites slot, using 100.
Note: motif [COT1_..] has an empty nsites slot, using 100.
Note: motif [COT2_..] has an empty nsites slot, using 100.
Note: motif [CREB1..] has an empty nsites slot, using 100.
Note: motif [CREM_..] has an empty nsites slot, using 100.
Note: motif [CRX_M..] has an empty nsites slot, using 100.
Note: motif [CTCFL..] has an empty nsites slot, using 100.
Note: motif [CTCF_..] has an empty nsites slot, using 100.
Note: motif [CUX1_..] has an empty nsites slot, using 100.
Note: motif [CUX2_..] has an empty nsites slot, using 100.
Note: motif [DBP_M..] has an empty nsites slot, using 100.
Note: motif [DDIT3..] has an empty nsites slot, using 100.
Note: motif [DLX3_..] has an empty nsites slot, using 100.
Note: motif [DLX5_..] has an empty nsites slot, using 100.
Note: motif [DMRT1..] has an empty nsites slot, using 100.
Note: motif [DMRTB..] has an empty nsites slot, using 100.
Note: motif [E2F1_..] has an empty nsites slot, using 100.
Note: motif [E2F2_..] has an empty nsites slot, using 100.
Note: motif [E2F3_..] has an empty nsites slot, using 100.
Note: motif [E2F4_..] has an empty nsites slot, using 100.
Note: motif [E2F5_..] has an empty nsites slot, using 100.
Note: motif [E2F6_..] has an empty nsites slot, using 100.
Note: motif [E2F7_..] has an empty nsites slot, using 100.
Note: motif [EGR1_..] has an empty nsites slot, using 100.
Note: motif [EGR2_..] has an empty nsites slot, using 100.
Note: motif [EHF_M..] has an empty nsites slot, using 100.
Note: motif [ELF1_..] has an empty nsites slot, using 100.
Note: motif [ELF2_..] has an empty nsites slot, using 100.
Note: motif [ELF3_..] has an empty nsites slot, using 100.
Note: motif [ELF5_..] has an empty nsites slot, using 100.
Note: motif [ELK1_..] has an empty nsites slot, using 100.
Note: motif [ELK4_..] has an empty nsites slot, using 100.
Note: motif [EPAS1..] has an empty nsites slot, using 100.
Note: motif [ERG_M..] has an empty nsites slot, using 100.
Note: motif [ERR1_..] has an empty nsites slot, using 100.
Note: motif [ERR2_..] has an empty nsites slot, using 100.
Note: motif [ERR3_..] has an empty nsites slot, using 100.
Note: motif [ESR1_..] has an empty nsites slot, using 100.
Note: motif [ESR2_..] has an empty nsites slot, using 100.
Note: motif [ETS1_..] has an empty nsites slot, using 100.
Note: motif [ETS2_..] has an empty nsites slot, using 100.
Note: motif [ETV2_..] has an empty nsites slot, using 100.
Note: motif [ETV4_..] has an empty nsites slot, using 100.
Note: motif [ETV6_..] has an empty nsites slot, using 100.
Note: motif [EVI1_..] has an empty nsites slot, using 100.
Note: motif [FEV_M..] has an empty nsites slot, using 100.
Note: motif [FLI1_..] has an empty nsites slot, using 100.
Note: motif [FOSB_..] has an empty nsites slot, using 100.
Note: motif [FOSL1..] has an empty nsites slot, using 100.
Note: motif [FOSL2..] has an empty nsites slot, using 100.
Note: motif [FOS_M..] has an empty nsites slot, using 100.
Note: motif [FOXA1..] has an empty nsites slot, using 100.
Note: motif [FOXA2..] has an empty nsites slot, using 100.
Note: motif [FOXA3..] has an empty nsites slot, using 100.
Note: motif [FOXC1..] has an empty nsites slot, using 100.
Note: motif [FOXD3..] has an empty nsites slot, using 100.
Note: motif [FOXI1..] has an empty nsites slot, using 100.
Note: motif [FOXJ2..] has an empty nsites slot, using 100.
Note: motif [FOXJ3..] has an empty nsites slot, using 100.
Note: motif [FOXK1..] has an empty nsites slot, using 100.
Note: motif [FOXL2..] has an empty nsites slot, using 100.
Note: motif [FOXM1..] has an empty nsites slot, using 100.
Note: motif [FOXO1..] has an empty nsites slot, using 100.
Note: motif [FOXO3..] has an empty nsites slot, using 100.
Note: motif [FOXO4..] has an empty nsites slot, using 100.
Note: motif [FOXP2..] has an empty nsites slot, using 100.
Note: motif [FOXQ1..] has an empty nsites slot, using 100.
Note: motif [GABPA..] has an empty nsites slot, using 100.
Note: motif [GATA1..] has an empty nsites slot, using 100.
Note: motif [GATA2..] has an empty nsites slot, using 100.
Note: motif [GATA3..] has an empty nsites slot, using 100.
Note: motif [GATA4..] has an empty nsites slot, using 100.
Note: motif [GATA6..] has an empty nsites slot, using 100.
Note: motif [GCR_M..] has an empty nsites slot, using 100.
Note: motif [GFI1B..] has an empty nsites slot, using 100.
Note: motif [GFI1_..] has an empty nsites slot, using 100.
Note: motif [GLI1_..] has an empty nsites slot, using 100.
Note: motif [GRHL2..] has an empty nsites slot, using 100.
Note: motif [HAND1..] has an empty nsites slot, using 100.
Note: motif [HEN1_..] has an empty nsites slot, using 100.
Note: motif [HIC1_..] has an empty nsites slot, using 100.
Note: motif [HIF1A..] has an empty nsites slot, using 100.
Note: motif [HINFP..] has an empty nsites slot, using 100.
Note: motif [HLF_M..] has an empty nsites slot, using 100.
Note: motif [HNF1A..] has an empty nsites slot, using 100.
Note: motif [HNF1B..] has an empty nsites slot, using 100.
Note: motif [HNF4A..] has an empty nsites slot, using 100.
Note: motif [HNF4G..] has an empty nsites slot, using 100.
Note: motif [HNF6_..] has an empty nsites slot, using 100.
Note: motif [HSF1_..] has an empty nsites slot, using 100.
Note: motif [HSF2_..] has an empty nsites slot, using 100.
Note: motif [HTF4_..] has an empty nsites slot, using 100.
Note: motif [HXA10..] has an empty nsites slot, using 100.
Note: motif [HXA13..] has an empty nsites slot, using 100.
Note: motif [HXA1_..] has an empty nsites slot, using 100.
Note: motif [HXA9_..] has an empty nsites slot, using 100.
Note: motif [HXB4_..] has an empty nsites slot, using 100.
Note: motif [HXB7_..] has an empty nsites slot, using 100.
Note: motif [HXB8_..] has an empty nsites slot, using 100.
Note: motif [HXC9_..] has an empty nsites slot, using 100.
Note: motif [IKZF1..] has an empty nsites slot, using 100.
Note: motif [INSM1..] has an empty nsites slot, using 100.
Note: motif [IRF1_..] has an empty nsites slot, using 100.
Note: motif [IRF2_..] has an empty nsites slot, using 100.
Note: motif [IRF3_..] has an empty nsites slot, using 100.
Note: motif [IRF4_..] has an empty nsites slot, using 100.
Note: motif [IRF7_..] has an empty nsites slot, using 100.
Note: motif [IRF8_..] has an empty nsites slot, using 100.
Note: motif [IRF9_..] has an empty nsites slot, using 100.
Note: motif [ISL1_..] has an empty nsites slot, using 100.
Note: motif [ITF2_..] has an empty nsites slot, using 100.
Note: motif [JUNB_..] has an empty nsites slot, using 100.
Note: motif [JUND_..] has an empty nsites slot, using 100.
Note: motif [JUN_M..] has an empty nsites slot, using 100.
Note: motif [KAISO..] has an empty nsites slot, using 100.
Note: motif [KLF15..] has an empty nsites slot, using 100.
Note: motif [KLF1_..] has an empty nsites slot, using 100.
Note: motif [KLF3_..] has an empty nsites slot, using 100.
Note: motif [KLF4_..] has an empty nsites slot, using 100.
Note: motif [KLF5_..] has an empty nsites slot, using 100.
Note: motif [KLF6_..] has an empty nsites slot, using 100.
Note: motif [KLF8_..] has an empty nsites slot, using 100.
Note: motif [LEF1_..] has an empty nsites slot, using 100.
Note: motif [LHX2_..] has an empty nsites slot, using 100.
Note: motif [LHX3_..] has an empty nsites slot, using 100.
Note: motif [LHX6_..] has an empty nsites slot, using 100.
Note: motif [LYL1_..] has an empty nsites slot, using 100.
Note: motif [MAFB_..] has an empty nsites slot, using 100.
Note: motif [MAFF_..] has an empty nsites slot, using 100.
Note: motif [MAFG_..] has an empty nsites slot, using 100.
Note: motif [MAFK_..] has an empty nsites slot, using 100.
Note: motif [MAF_M..] has an empty nsites slot, using 100.
Note: motif [MAX_M..] has an empty nsites slot, using 100.
Note: motif [MAZ_M..] has an empty nsites slot, using 100.
Note: motif [MBD2_..] has an empty nsites slot, using 100.
Note: motif [MECP2..] has an empty nsites slot, using 100.
Note: motif [MEF2A..] has an empty nsites slot, using 100.
Note: motif [MEF2C..] has an empty nsites slot, using 100.
Note: motif [MEF2D..] has an empty nsites slot, using 100.
Note: motif [MEIS1..] has an empty nsites slot, using 100.
Note: motif [MEIS2..] has an empty nsites slot, using 100.
Note: motif [MITF_..] has an empty nsites slot, using 100.
Note: motif [MSGN1..] has an empty nsites slot, using 100.
Note: motif [MTF1_..] has an empty nsites slot, using 100.
Note: motif [MXI1_..] has an empty nsites slot, using 100.
Note: motif [MYBA_..] has an empty nsites slot, using 100.
Note: motif [MYB_M..] has an empty nsites slot, using 100.
Note: motif [MYCN_..] has an empty nsites slot, using 100.
Note: motif [MYC_M..] has an empty nsites slot, using 100.
Note: motif [MYF6_..] has an empty nsites slot, using 100.
Note: motif [MYOD1..] has an empty nsites slot, using 100.
Note: motif [MYOG_..] has an empty nsites slot, using 100.
Note: motif [NANOG..] has an empty nsites slot, using 100.
Note: motif [NDF1_..] has an empty nsites slot, using 100.
Note: motif [NDF2_..] has an empty nsites slot, using 100.
Note: motif [NF2L1..] has an empty nsites slot, using 100.
Note: motif [NF2L2..] has an empty nsites slot, using 100.
Note: motif [NFAC1..] has an empty nsites slot, using 100.
Note: motif [NFAC2..] has an empty nsites slot, using 100.
Note: motif [NFAC3..] has an empty nsites slot, using 100.
Note: motif [NFAC4..] has an empty nsites slot, using 100.
Note: motif [NFE2_..] has an empty nsites slot, using 100.
Note: motif [NFIA_..] has an empty nsites slot, using 100.
Note: motif [NFIB_..] has an empty nsites slot, using 100.
Note: motif [NFIC_..] has an empty nsites slot, using 100.
Note: motif [NFIL3..] has an empty nsites slot, using 100.
Note: motif [NFKB1..] has an empty nsites slot, using 100.
Note: motif [NFKB2..] has an empty nsites slot, using 100.
Note: motif [NFYA_..] has an empty nsites slot, using 100.
Note: motif [NFYB_..] has an empty nsites slot, using 100.
Note: motif [NFYC_..] has an empty nsites slot, using 100.
Note: motif [NGN2_..] has an empty nsites slot, using 100.
Note: motif [NKX21..] has an empty nsites slot, using 100.
Note: motif [NKX22..] has an empty nsites slot, using 100.
Note: motif [NKX25..] has an empty nsites slot, using 100.
Note: motif [NKX28..] has an empty nsites slot, using 100.
Note: motif [NKX31..] has an empty nsites slot, using 100.
Note: motif [NKX32..] has an empty nsites slot, using 100.
Note: motif [NKX61..] has an empty nsites slot, using 100.
Note: motif [NOBOX..] has an empty nsites slot, using 100.
Note: motif [NR1D1..] has an empty nsites slot, using 100.
Note: motif [NR1D2..] has an empty nsites slot, using 100.
Note: motif [NR1H3..] has an empty nsites slot, using 100.
Note: motif [NR1H4..] has an empty nsites slot, using 100.
Note: motif [NR1I2..] has an empty nsites slot, using 100.
Note: motif [NR1I3..] has an empty nsites slot, using 100.
Note: motif [NR2C1..] has an empty nsites slot, using 100.
Note: motif [NR2C2..] has an empty nsites slot, using 100.
Note: motif [NR2E3..] has an empty nsites slot, using 100.
Note: motif [NR4A1..] has an empty nsites slot, using 100.
Note: motif [NR4A2..] has an empty nsites slot, using 100.
Note: motif [NR5A2..] has an empty nsites slot, using 100.
Note: motif [NRF1_..] has an empty nsites slot, using 100.
Note: motif [OLIG2..] has an empty nsites slot, using 100.
Note: motif [OTX2_..] has an empty nsites slot, using 100.
Note: motif [OVOL1..] has an empty nsites slot, using 100.
Note: motif [OVOL2..] has an empty nsites slot, using 100.
Note: motif [P53_M..] has an empty nsites slot, using 100.
Note: motif [P63_M..] has an empty nsites slot, using 100.
Note: motif [P73_M..] has an empty nsites slot, using 100.
Note: motif [PAX5_..] has an empty nsites slot, using 100.
Note: motif [PAX6_..] has an empty nsites slot, using 100.
Note: motif [PBX1_..] has an empty nsites slot, using 100.
Note: motif [PBX2_..] has an empty nsites slot, using 100.
Note: motif [PBX3_..] has an empty nsites slot, using 100.
Note: motif [PDX1_..] has an empty nsites slot, using 100.
Note: motif [PEBB_..] has an empty nsites slot, using 100.
Note: motif [PIT1_..] has an empty nsites slot, using 100.
Note: motif [PITX1..] has an empty nsites slot, using 100.
Note: motif [PKNX1..] has an empty nsites slot, using 100.
Note: motif [PO2F1..] has an empty nsites slot, using 100.
Note: motif [PO2F2..] has an empty nsites slot, using 100.
Note: motif [PO3F1..] has an empty nsites slot, using 100.
Note: motif [PO3F2..] has an empty nsites slot, using 100.
Note: motif [PO5F1..] has an empty nsites slot, using 100.
Note: motif [PPARA..] has an empty nsites slot, using 100.
Note: motif [PPARG..] has an empty nsites slot, using 100.
Note: motif [PRD14..] has an empty nsites slot, using 100.
Note: motif [PRD16..] has an empty nsites slot, using 100.
Note: motif [PRDM1..] has an empty nsites slot, using 100.
Note: motif [PRDM5..] has an empty nsites slot, using 100.
Note: motif [PRDM9..] has an empty nsites slot, using 100.
Note: motif [PRGR_..] has an empty nsites slot, using 100.
Note: motif [PROP1..] has an empty nsites slot, using 100.
Note: motif [PRRX2..] has an empty nsites slot, using 100.
Note: motif [PTF1A..] has an empty nsites slot, using 100.
Note: motif [RARA_..] has an empty nsites slot, using 100.
Note: motif [RARG_..] has an empty nsites slot, using 100.
Note: motif [RELB_..] has an empty nsites slot, using 100.
Note: motif [REL_M..] has an empty nsites slot, using 100.
Note: motif [REST_..] has an empty nsites slot, using 100.
Note: motif [RFX1_..] has an empty nsites slot, using 100.
Note: motif [RFX2_..] has an empty nsites slot, using 100.
Note: motif [RFX3_..] has an empty nsites slot, using 100.
Note: motif [RFX6_..] has an empty nsites slot, using 100.
Note: motif [RORA_..] has an empty nsites slot, using 100.
Note: motif [RORG_..] has an empty nsites slot, using 100.
Note: motif [RUNX1..] has an empty nsites slot, using 100.
Note: motif [RUNX2..] has an empty nsites slot, using 100.
Note: motif [RUNX3..] has an empty nsites slot, using 100.
Note: motif [RXRA_..] has an empty nsites slot, using 100.
Note: motif [RXRB_..] has an empty nsites slot, using 100.
Note: motif [RXRG_..] has an empty nsites slot, using 100.
Note: motif [SALL4..] has an empty nsites slot, using 100.
Note: motif [SIX2_..] has an empty nsites slot, using 100.
Note: motif [SIX4_..] has an empty nsites slot, using 100.
Note: motif [SMAD2..] has an empty nsites slot, using 100.
Note: motif [SMAD3..] has an empty nsites slot, using 100.
Note: motif [SMAD4..] has an empty nsites slot, using 100.
Note: motif [SMCA5..] has an empty nsites slot, using 100.
Note: motif [SNAI1..] has an empty nsites slot, using 100.
Note: motif [SNAI2..] has an empty nsites slot, using 100.
Note: motif [SOX10..] has an empty nsites slot, using 100.
Note: motif [SOX2_..] has an empty nsites slot, using 100.
Note: motif [SOX3_..] has an empty nsites slot, using 100.
Note: motif [SOX4_..] has an empty nsites slot, using 100.
Note: motif [SOX5_..] has an empty nsites slot, using 100.
Note: motif [SOX9_..] has an empty nsites slot, using 100.
Note: motif [SP1_M..] has an empty nsites slot, using 100.
Note: motif [SP2_M..] has an empty nsites slot, using 100.
Note: motif [SP3_M..] has an empty nsites slot, using 100.
Note: motif [SP4_M..] has an empty nsites slot, using 100.
Note: motif [SP5_M..] has an empty nsites slot, using 100.
Note: motif [SP7_M..] has an empty nsites slot, using 100.
Note: motif [SPI1_..] has an empty nsites slot, using 100.
Note: motif [SPIB_..] has an empty nsites slot, using 100.
Note: motif [SRBP1..] has an empty nsites slot, using 100.
Note: motif [SRBP2..] has an empty nsites slot, using 100.
Note: motif [SRF_M..] has an empty nsites slot, using 100.
Note: motif [SRY_M..] has an empty nsites slot, using 100.
Note: motif [STA5A..] has an empty nsites slot, using 100.
Note: motif [STA5B..] has an empty nsites slot, using 100.
Note: motif [STAT1..] has an empty nsites slot, using 100.
Note: motif [STAT2..] has an empty nsites slot, using 100.
Note: motif [STAT3..] has an empty nsites slot, using 100.
Note: motif [STAT4..] has an empty nsites slot, using 100.
Note: motif [STAT6..] has an empty nsites slot, using 100.
Note: motif [STF1_..] has an empty nsites slot, using 100.
Note: motif [SUH_M..] has an empty nsites slot, using 100.
Note: motif [TAF1_..] has an empty nsites slot, using 100.
Note: motif [TAL1_..] has an empty nsites slot, using 100.
Note: motif [TBP_M..] has an empty nsites slot, using 100.
Note: motif [TBX20..] has an empty nsites slot, using 100.
Note: motif [TBX21..] has an empty nsites slot, using 100.
Note: motif [TBX3_..] has an empty nsites slot, using 100.
Note: motif [TCF7_..] has an empty nsites slot, using 100.
Note: motif [TEAD1..] has an empty nsites slot, using 100.
Note: motif [TEAD2..] has an empty nsites slot, using 100.
Note: motif [TEAD4..] has an empty nsites slot, using 100.
Note: motif [TF2L1..] has an empty nsites slot, using 100.
Note: motif [TF65_..] has an empty nsites slot, using 100.
Note: motif [TF7L1..] has an empty nsites slot, using 100.
Note: motif [TF7L2..] has an empty nsites slot, using 100.
Note: motif [TFE2_..] has an empty nsites slot, using 100.
Note: motif [TFE3_..] has an empty nsites slot, using 100.
Note: motif [TFEB_..] has an empty nsites slot, using 100.
Note: motif [TGIF1..] has an empty nsites slot, using 100.
Note: motif [THA11..] has an empty nsites slot, using 100.
Note: motif [THA_M..] has an empty nsites slot, using 100.
Note: motif [TWST1..] has an empty nsites slot, using 100.
Note: motif [TYY1_..] has an empty nsites slot, using 100.
Note: motif [USF1_..] has an empty nsites slot, using 100.
Note: motif [USF2_..] has an empty nsites slot, using 100.
Note: motif [VDR_M..] has an empty nsites slot, using 100.
Note: motif [VSX2_..] has an empty nsites slot, using 100.
Note: motif [WT1_M..] has an empty nsites slot, using 100.
Note: motif [XBP1_..] has an empty nsites slot, using 100.
Note: motif [ZBT17..] has an empty nsites slot, using 100.
Note: motif [ZBT7A..] has an empty nsites slot, using 100.
Note: motif [ZEB1_..] has an empty nsites slot, using 100.
Note: motif [ZFP42..] has an empty nsites slot, using 100.
Note: motif [ZFP57..] has an empty nsites slot, using 100.
Note: motif [ZFX_M..] has an empty nsites slot, using 100.
Note: motif [ZIC1_..] has an empty nsites slot, using 100.
Note: motif [ZIC2_..] has an empty nsites slot, using 100.
Note: motif [ZIC3_..] has an empty nsites slot, using 100.
Note: motif [ZKSC1..] has an empty nsites slot, using 100.
Note: motif [ZN143..] has an empty nsites slot, using 100.
Note: motif [ZN281..] has an empty nsites slot, using 100.
Note: motif [ZN322..] has an empty nsites slot, using 100.
Note: motif [ZN335..] has an empty nsites slot, using 100.
Note: motif [ZN431..] has an empty nsites slot, using 100.
PWMs <- do.call(PWMatrixList,Hocomocov11)
DefaultAssay(s.data) <- "peaks"
# add motif information
s.data <- Signac::AddMotifs(
object = s.data,
genome = BSgenome.Mmusculus.UCSC.mm10,
pfm = PWMs
)
Building motif matrix
Finding motif positions
Creating Motif object
DefaultAssay(s.data) <- "peaks"
closest.features <- ClosestFeature(s.data, regions = rownames(s.data))
saveRDS(closest.features, file="../analyses/E12R1_nmm_closest_features.271021.Rds")
s.data <- RunChromVAR(
object = s.data,
genome = BSgenome.Mmusculus.UCSC.mm10
)
Computing GC bias per region
Selecting background regions
Computing deviations from background
Constructing chromVAR assay
Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
# We need to run detection separately for both modality and then combine via AUC score
print("Running presto::wilcoxauc for RNA modality")
[1] "Running presto::wilcoxauc for RNA modality"
DefaultAssay(s.data) <- "RNA"
markers_rna <- presto:::wilcoxauc.Seurat(X = s.data, group_by = "seurat_clusters", assay = 'data', seurat_assay = 'RNA')
print("Running presto::wilcoxauc for ATAC modality")
[1] "Running presto::wilcoxauc for ATAC modality"
DefaultAssay(s.data) <- "peaks"
markers_atac <- presto:::wilcoxauc.Seurat(X = s.data, group_by = "seurat_clusters", assay = 'data', seurat_assay = 'peaks')
markers.atac.annotated <- as_tibble(cbind(markers_atac, closest.features))
Warning in data.frame(..., check.names = FALSE) :
row names were found from a short variable and have been discarded
# Then we need to 1) annotate ATAC features 2) combine with RNA modality 3) Think of its presentation
#saveRDS(atac.expression.markers, file = paste("../analyses/", sample.name,".atac.expression.markers.mm.Rds",sep=""))
motif.markers <- markers.atac.annotated %>% filter(logFC > 0.25 & padj <= 0.01) %>% group_by(group) %>% select(feature, group) %>% group_modify(~FindMotifs(object=s.data, features=.x$feature)) %>% filter(pvalue <= 0.01 & fold.enrichment >= 1.5)
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1163 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1880 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 666 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 709 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 4543 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1980 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 2459 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 2252 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1063 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1309 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 2916 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 437 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 2067 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 4766 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 2991 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 4077 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1788 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 2006 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 3518 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 3443 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1852 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1162 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 4499 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 2633 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1217 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1571 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1044 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 2907 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 805 regions
Selecting background regions to match input sequence characteristics
Matching GC.percent distribution
Testing motif enrichment in 1213 regions
DefaultAssay(s.data) <- "chromvar"
markers_chromvar <- as_tibble(FindAllMarkers(
object = s.data,
only.pos = TRUE,
test.use = 'LR',
latent.vars = 'nCount_peaks'
)) %>% filter(p_val_adj <= 0.01 & avg_log2FC >= 0.75)
Calculating cluster 1
Calculating cluster 2
Calculating cluster 3
Calculating cluster 4
Calculating cluster 5
Calculating cluster 6
Calculating cluster 7
Calculating cluster 8
Calculating cluster 9
Calculating cluster 10
Calculating cluster 11
Calculating cluster 12
Calculating cluster 13
Calculating cluster 14
Calculating cluster 15
Calculating cluster 16
Calculating cluster 17
Calculating cluster 18
Calculating cluster 19
Calculating cluster 20
Calculating cluster 21
Calculating cluster 22
Calculating cluster 23
Calculating cluster 24
Calculating cluster 25
Calculating cluster 26
Calculating cluster 27
Calculating cluster 28
Calculating cluster 29
Calculating cluster 30
# Now we need to combine markers_rna, markers_atac, markers_chromvar, motif.markers in meaningful output to help with cluster annotation
# Writing marker info out to be used separately, writing out top 100
top.markers_rna <- as_tibble(markers_rna) %>% dplyr::filter(padj <= 0.01) %>% group_by(group) %>% top_n(n = 25, wt = logFC)
top.markers_rna <- left_join(top.markers_rna, feature.metadata.rna, by=c("feature"="gene_id"))
top.markers_atac <- as_tibble(markers.atac.annotated) %>% dplyr::filter(padj <= 0.01) %>% group_by(group) %>% top_n(n = 25, wt = logFC)
top.markers_chromvar <- as_tibble(markers_chromvar) %>% dplyr::filter(p_val_adj <= 0.01) %>% group_by(cluster) %>% top_n(n = 25, wt = avg_log2FC)
top.markers_motifs <- as_tibble(motif.markers) %>% dplyr::filter(pvalue <= 0.01) %>% group_by(group) %>% top_n(n = 25, wt = fold.enrichment)
save(list=c("top.markers_rna","top.markers_atac","top.markers_chromvar","top.markers_motifs"), file=paste("../analyses/e12R1_nmm_scATAC_cluster_markers.",run.date,".RData",sep=""))
saveRDS(s.data,paste("../scATAC_data/",sample.name,"_DownstreamReady_nmm_.",run.date,".Rds",sep=""))
s.data.slim <- s.data
DefaultAssay(s.data.slim) <- "peaks"
s.data.slim[['peaks_count']] <- NULL
s.data.slim[['Activity']] <- NULL
saveRDS(s.data.slim,paste("../scATAC_data/",sample.name,"_DownstreamReady_nmm_slim.",run.date,".Rds",sep=""))
sessionInfo()
R version 4.1.1 (2021-08-10)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Users/kilpinen/opt/anaconda3/envs/r411_291021/lib/libopenblasp-r0.3.18.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] splines grid stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] universalmotif_1.12.4 learnMotifs_0.1.0 cicero_1.12.0
[4] Gviz_1.38.3 monocle_2.22.0 DDRTree_0.1.5
[7] VGAM_1.1-5 future_1.22.1 motifmatchr_1.16.0
[10] flexdashboard_0.5.2 ggplotify_0.1.0 waffle_0.7.0
[13] plotly_4.10.0 pheatmap_1.0.12 org.Mm.eg.db_3.14.0
[16] EnsDb.Mmusculus.v79_2.99.0 ensembldb_2.18.0 AnnotationFilter_1.18.0
[19] GenomicFeatures_1.46.1 AnnotationDbi_1.56.0 Biobase_2.54.0
[22] BSgenome.Mmusculus.UCSC.mm10_1.4.3 BSgenome_1.62.0 rtracklayer_1.54.0
[25] Biostrings_2.62.0 XVector_0.34.0 TFBSTools_1.32.0
[28] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
[31] purrr_0.3.4 readr_2.0.2 tidyr_1.1.4
[34] tibble_3.1.5 ggplot2_3.3.5 tidyverse_1.3.1
[37] chromVAR_1.16.0 SeuratWrappers_0.3.0 reldist_1.6-6
[40] regioneR_1.26.0 SeuratObject_4.0.2 Seurat_4.0.5
[43] Signac_1.4.0 gridExtra_2.3 fastcluster_1.2.3
[46] RColorBrewer_1.1-2 clusterProfiler_4.2.0 umap_0.2.7.0
[49] irlba_2.3.3 densityClust_0.3 genomation_1.26.0
[52] Rtsne_0.15 proxy_0.4-26 Matrix_1.3-4
[55] gplots_3.1.1 plyr_1.8.6 data.table_1.14.2
[58] GenomicRanges_1.46.1 GenomeInfoDb_1.30.0 IRanges_2.28.0
[61] S4Vectors_0.32.0 BiocGenerics_0.40.0 DT_0.19
[64] viridisLite_0.4.0 pacman_0.5.1
loaded via a namespace (and not attached):
[1] graphlayouts_0.7.1 pbapply_1.5-0 lattice_0.20-45
[4] haven_2.4.3 vctrs_0.3.8 fastICA_1.2-3
[7] mgcv_1.8-38 blob_1.2.2 survival_3.2-13
[10] spatstat.data_2.1-0 later_1.3.0 DBI_1.1.1
[13] R.utils_2.11.0 rappdirs_0.3.3 uwot_0.1.10
[16] jpeg_0.1-9 tensorflow_2.8.0 zlibbioc_1.40.0
[19] htmlwidgets_1.5.4 leiden_0.3.9 parallel_4.1.1
[22] tidygraph_1.2.0 Rcpp_1.0.7 KernSmooth_2.23-20
[25] promises_1.2.0.1 DelayedArray_0.20.0 limma_3.50.0
[28] Hmisc_4.6-0 RSpectra_0.16-0 fs_1.5.0
[31] fastmatch_1.1-3 presto_1.0.0 digest_0.6.28
[34] png_0.1-7 qlcMatrix_0.9.7 sctransform_0.3.2
[37] scatterpie_0.1.7 cowplot_1.1.1 DOSE_3.20.0
[40] ggraph_2.0.5 pkgconfig_2.0.3 GO.db_3.14.0
[43] docopt_0.7.1 gridBase_0.4-7 reticulate_1.24
[46] SummarizedExperiment_1.24.0 xfun_0.27 bslib_0.3.1
[49] zoo_1.8-9 tidyselect_1.1.1 reshape2_1.4.4
[52] ica_1.0-2 rlang_0.4.12 jquerylib_0.1.4
[55] glue_1.4.2 modelr_0.1.8 CNEr_1.30.0
[58] matrixStats_0.61.0 MatrixGenerics_1.6.0 ggseqlogo_0.1
[61] labeling_0.4.2 httpuv_1.6.3 Rttf2pt1_1.3.9
[64] seqLogo_1.60.0 DO.db_2.9 annotate_1.72.0
[67] jsonlite_1.7.2 bit_4.0.4 mime_0.12
[70] nabor_0.5.0 Rsamtools_2.10.0 stringi_1.7.5
[73] RcppRoll_0.3.0 spatstat.sparse_2.0-0 scattermore_0.7
[76] yulab.utils_0.0.4 bitops_1.0-7 cli_3.1.0
[79] RSQLite_2.2.8 rstudioapi_0.13 GenomicAlignments_1.30.0
[82] nlme_3.1-153 qvalue_2.26.0 VariantAnnotation_1.40.0
[85] listenv_0.8.0 SnowballC_0.7.0 miniUI_0.1.1.1
[88] gridGraphics_0.5-1 R.oo_1.24.0 dbplyr_2.1.1
[91] readxl_1.3.1 lifecycle_1.0.1 munsell_0.5.0
[94] cellranger_1.1.0 R.methodsS3_1.8.1 caTools_1.18.2
[97] codetools_0.2-18 lmtest_0.9-38 htmlTable_2.3.0
[100] xtable_1.8-4 ROCR_1.0-11 BiocManager_1.30.16
[103] abind_1.4-5 farver_2.1.0 FNN_1.1.3
[106] parallelly_1.28.1 RANN_2.6.1 aplot_0.1.1
[109] askpass_1.1 biovizBase_1.42.0 poweRlaw_0.70.6
[112] sparsesvd_0.2 ggtree_3.2.0 BiocIO_1.4.0
[115] keras_2.8.0 RcppAnnoy_0.0.19 goftest_1.2-3
[118] patchwork_1.1.1 dichromat_2.0-0 cluster_2.1.2
[121] future.apply_1.8.1 zeallot_0.1.0 extrafontdb_1.0
[124] tidytree_0.3.5 ellipsis_0.3.2 prettyunits_1.1.1
[127] lubridate_1.8.0 ggridges_0.5.3 reprex_2.0.1
[130] igraph_1.2.7 fgsea_1.20.0 remotes_2.4.1
[133] slam_0.1-48 seqPattern_1.26.0 spatstat.utils_2.2-0
[136] htmltools_0.5.2 BiocFileCache_2.2.0 yaml_2.2.1
[139] utf8_1.2.2 XML_3.99-0.8 foreign_0.8-81
[142] withr_2.4.2 fitdistrplus_1.1-6 BiocParallel_1.28.0
[145] bit64_4.0.5 ProtGenerics_1.26.0 spatstat.core_2.3-0
[148] combinat_0.0-8 GOSemSim_2.20.0 rsvd_1.0.5
[151] memoise_2.0.0 evaluate_0.14 tzdb_0.2.0
[154] extrafont_0.17 curl_4.3.2 fansi_0.5.0
[157] tensor_1.5 checkmate_2.0.0 cachem_1.0.6
[160] deldir_1.0-6 impute_1.68.0 rjson_0.2.20
[163] ggrepel_0.9.1 tools_4.1.1 sass_0.4.0
[166] magrittr_2.0.1 RCurl_1.98-1.5 TFMPvalue_0.0.8
[169] ape_5.5 xml2_1.3.2 httr_1.4.2
[172] assertthat_0.2.1 rmarkdown_2.11 globals_0.14.0
[175] R6_2.5.1 nnet_7.3-16 DirichletMultinomial_1.36.0
[178] progress_1.2.2 KEGGREST_1.34.0 treeio_1.18.0
[181] gtools_3.9.2 lsa_0.73.2 ggfun_0.0.4
[184] colorspace_2.0-2 generics_0.1.1 base64enc_0.1-3
[187] pracma_2.3.3 pillar_1.6.4 tweenr_1.0.2
[190] HSMMSingleCell_1.13.0 GenomeInfoDbData_1.2.7 gtable_0.3.0
[193] rvest_1.0.2 restfulr_0.0.13 knitr_1.36
[196] latticeExtra_0.6-29 shadowtext_0.0.9 biomaRt_2.50.0
[199] fastmap_1.1.0 crosstalk_1.1.1 tfruns_1.5.0
[202] broom_0.7.9 openssl_1.4.5 scales_1.1.1
[205] filelock_1.0.2 backports_1.3.0 plotrix_3.8-2
[208] vroom_1.5.5 enrichplot_1.14.0 hms_1.1.1
[211] ggforce_0.3.3 shiny_1.7.1 polyclip_1.10-0
[214] lazyeval_0.2.2 Formula_1.2-4 whisker_0.4
[217] crayon_1.4.1 MASS_7.3-54 downloader_0.4
[220] viridis_0.6.2 rpart_4.1-15 compiler_4.1.1
[223] spatstat.geom_2.3-0
scATAC.clusters <- Idents(s.data)
scRNA.clusters <- s.data@meta.data$predicted.id
conf.mat <- table(as.factor(scRNA.clusters),scATAC.clusters)
create_dt(as.data.frame.matrix(conf.mat))
Cross-tabulation (confusion matrix) in to what extent each scRNA based cell type is included in each scATAC cluster.
# create_dt(as.data.frame.matrix(d.plot$data))
# top.20.markers.by.cluster <- e14di.atc.expression.markers %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC)
# create_dt(as.data.frame.matrix(data.frame(top.20.markers.by.cluster)))
Marker table filtered for convenience having top 20 genes pre cluster
# clean_top20_markers <- read_tsv(scRNA_clean_markers_file)
# create_dt(clean_top20_markers)